R/categorical ranges.R

Defines functions cat_num

Documented in cat_num

#' Insert the categorical label for a vector of numerical values.
#'
#' Numerical data is passed along with the number of bins required. A label of binned factor ranges is returned
#'
#' @param data a dataframe containing the data
#' @param num_col is the column name containing the numerical values to be binned and labeled.
#' @param bins is the number of bins to assign the categorical labels
#' @return the categorical labels as a factor
#' @import dplyr
#' @examples
#'
#' # Create some data
#' samples <- data.frame(samp = sample(1:1000, 500, replace = T))
#'
#' cat_num(samples, samp)
#'
#' # Use with dplyr
#'
#' library(dplyr)
#' samples %>% cat_num(samp)

#' @export


cat_num <- function(data, num_col, bins = 4){

    if(bins < 2) stop("value for bins cannot be less than 2", call. = F)

    col_name <- deparse(substitute(num_col))
    num_col <- enquo(num_col)

    dat <- data %>%
        mutate(label = cut(!!num_col, breaks = bins),
                           label = factor(paste0(round(as.numeric( sub("\\((.+),.*", "\\1", label) ), 2),
                                                    "-",
                                                    round(as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", label) ), 2)))

    ) %>%
        select(label)

    names(dat) <- paste0("label_", col_name)
    dat <- cbind(data, dat)

     return(dat)

}
towananalytics/tatools documentation built on Jan. 24, 2022, 7:31 p.m.